所以我們這裡通過sklearn來快速調用
簡單來說10個步驟就可以用Python完成模型建置~~
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# 例如讀取一個csv文件
import pandas as pd
data = pd.read_csv('data.csv')
X = data.drop('target', axis=1) # 特徵
y = data['target'] # 目標變量
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
白話來說只要改這裡使用哪種模型即可
6. 選擇模型:選擇適合你問題的模型,例如,這裡使用了一個隨機森林分類器。
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'準確率:{accuracy}')
# 使用交叉驗證和網格搜索來尋找最佳參數
from sklearn.model_selection import GridSearchCV
param_grid = {'n_estimators': [50, 100, 200]}
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)